library(ggpubr)
## Loading required package: ggplot2
## Loading required package: magrittr
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 2.1.3 ✔ purrr 0.3.4
## ✔ tidyr 1.1.0 ✔ dplyr 0.8.3
## ✔ readr 1.3.1 ✔ stringr 1.3.1
## ✔ tibble 2.1.3 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::extract() masks magrittr::extract()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::set_names() masks magrittr::set_names()
library(readr)
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(ggrepel)
library(ggthemes)
library(DT)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(corrr)
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
WRITE_OUTPUT <- FALSE
RUN_RF <- FALSE
source('data_prep_risk_score.R') ##
cr_established <- cor(fh_acs_established, use='pairwise.complete.obs')
new_names <- c('Cancer', 'Arthritis', 'Stroke', 'Chronic Asthma', 'COPD', 'Heart Disease', 'Diabetes', 'Kidney Disease', 'BP Medication', 'Smoking', 'High BP', 'Obesity', 'High Cholesterol', 'Male Over 65', 'Female Over 65')
colnames(cr_established) <- new_names
rownames(cr_established) <- new_names
heatmapColors <- function(numColors=16) {
c1 <- rainbow(numColors,v=seq(0.5,1,length=numColors),s=seq(1,0.3,length=numColors),start=4/6,end=4.0001/6);
c2 <- rainbow(numColors,v=seq(0.5,1,length=numColors),s=seq(1,0.3,length=numColors),start=1/6,end=1.0001/6);
c3 <- c(c1,rev(c2));
return(c3)
}
heatmap.2(cr_established, trace='none',margins = c(10, 10), col=heatmapColors(8))
quantile(abs(cr_established[upper.tri(cr_established)]))
## 0% 25% 50% 75% 100%
## 0.1166857 0.3467861 0.6331859 0.7840348 0.9621185
fh_acs_established_2 <- fh_acs_established
colnames(fh_acs_established_2) <- new_names
cr_established_tidy <- fh_acs_established_2 %>% correlate() %>% stretch()
##
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'
diabetes_related <- c('Heart Disease', 'Diabetes', 'Kidney Disease', 'Stroke')
risk_factors_diabetes <- c('High BP', 'Obesity', 'High Cholesterol')
cr_established_tidy %>% filter(x %in% diabetes_related, y %in% diabetes_related) %>% summarize(quantile_low=quantile(r, na.rm = T, probs=.25), median=median(r, na.rm=T), quantile_high=quantile(r, na.rm=T, probs=.75), avg=mean(abs(r), na.rm=T))
cr_established_tidy %>% filter(x %in% risk_factors_diabetes, y %in% risk_factors_diabetes) %>% summarize(quantile_low=quantile(r, na.rm = T, probs=.25), median=median(r, na.rm=T), quantile_high=quantile(r, na.rm=T, probs=.75), avg=mean(abs(r), na.rm=T))
cr_established_tidy %>% filter(x %in% risk_factors_diabetes, y %in% diabetes_related) %>% summarize(quantile_low=quantile(r, na.rm = T, probs=.25), median=median(r, na.rm=T), quantile_high=quantile(r, na.rm=T, probs=.75), avg=mean(abs(r), na.rm=T))
cr_established_tidy %>% filter(x == 'Chronic Asthma', y == 'COPD')
cr_established_tidy %>% filter(x == 'Smoking', y == 'COPD')
cr_established_tidy %>% filter(x == 'Smoking', y == 'Chronic Asthma')
cr_established_tidy %>% filter(x == 'Obesity') %>% summarize(mean=mean(abs(r), na.rm=T))
cr_established_tidy %>% filter(x == 'Cancer', y %in% c('Male Over 65', 'Female Over 65')) %>% summarize(mean=mean(abs(r), na.rm=T))
remove(fh_acs_established_2)
# number of tracts
num_tracts <- fh_acs[non_na, c("placename", "county_code", "stateabbr", "total_population")]
num_tracts %>% group_by(placename, stateabbr) %>% summarize(number_of_tracts=n(), total_population=sum(total_population)) %>% arrange(desc(number_of_tracts))
num_tracts %>% group_by(placename, stateabbr) %>% summarize(number_of_tracts=n()) %>% ungroup() %>% summarise(median=median(number_of_tracts), low=min(number_of_tracts), hi=max(number_of_tracts), pct_25=quantile(number_of_tracts, probs=.25), pct_75=quantile(number_of_tracts, probs = .75),sd=sd(number_of_tracts)) %>% mutate(IQR = pct_75 -pct_25)
fh_acs_established_long <- fh_acs_established %>% cbind(fh_acs[non_na,"placename"])
names(fh_acs_established_long) <- c(new_names, "placename")
fh_acs_established_long <- fh_acs_established_long %>% gather('disease', 'prevalence', -placename)
fh_acs_established_long <- fh_acs_established_long %>% mutate(disease = fct_reorder(disease, prevalence, .fun='median'))
prevalence_p <- ggplot(fh_acs_established_long, aes(disease, prevalence*100))
prevalence_p <- prevalence_p + geom_violin()
prevalence_p <- prevalence_p + xlab('') + ylab('Prevalence') + coord_flip() + theme_fivethirtyeight() + theme(axis.title = element_text())
prevalence_p
DT::datatable(fh_acs_established_long %>% group_by(disease) %>% summarize(median=median(prevalence), low=min(prevalence), hi=max(prevalence), pct_25=quantile(prevalence, probs=.25), pct_75=quantile(prevalence, probs = .75), sd=sd(prevalence)) %>% arrange(desc(median)))
DT::datatable(fh_acs_established_long %>% group_by(disease, placename) %>% summarise(median=median(prevalence), low=min(prevalence), hi=max(prevalence), pct_25=quantile(prevalence, probs=.25), pct_75=quantile(prevalence, probs = .75), sd=sd(prevalence)) %>% arrange(desc(sd)))
summ_prev_per_city <- fh_acs_established_long %>% group_by(disease, placename) %>% summarise(median=median(prevalence), low=min(prevalence), hi=max(prevalence), pct_25=quantile(prevalence, probs=.25), pct_75=quantile(prevalence, probs = .75),sd=sd(prevalence)) %>% mutate(IQR = pct_75 -pct_25)
## plot the IQR vs. median prevalence per city
summ_prev_top_iqrs <- summ_prev_per_city %>% group_by(disease) %>% top_n(3, IQR) %>% ungroup()
p <- ggplot(summ_prev_per_city, aes(median, IQR))
p <- p + geom_point() + facet_wrap(~disease, nrow=3)
p <- p + geom_text_repel(data=summ_prev_top_iqrs, aes(median, IQR, label=placename), color='red', size=3)
p <- p + geom_point(data=summ_prev_top_iqrs, aes(median, IQR), color='red')
p <- p + theme_fivethirtyeight() + theme(axis.title = element_text(), legend.position = 'none') + labs(x='Median Prevalence in a City', y='75th - 25th Percentile of Prevalence in a City')
p
pc_established <- prcomp(fh_acs_established,center=T, scale.=T)
summary(pc_established)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 3.0352 1.9013 0.74619 0.67495 0.5519 0.50395
## Proportion of Variance 0.6141 0.2410 0.03712 0.03037 0.0203 0.01693
## Cumulative Proportion 0.6141 0.8551 0.89227 0.92264 0.9429 0.95988
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.40954 0.38578 0.32818 0.25263 0.19010 0.18311
## Proportion of Variance 0.01118 0.00992 0.00718 0.00425 0.00241 0.00224
## Cumulative Proportion 0.97106 0.98098 0.98816 0.99241 0.99482 0.99706
## PC13 PC14 PC15
## Standard deviation 0.13607 0.11964 0.10621
## Proportion of Variance 0.00123 0.00095 0.00075
## Cumulative Proportion 0.99829 0.99925 1.00000
pc_established$rotation [, c(1,2)]
## PC1 PC2
## CANCER_CrudePrev 0.1577109 -0.42669445
## ARTHRITIS_CrudePrev 0.3001464 -0.12969807
## STROKE_CrudePrev 0.3132249 0.09335877
## CASTHMA_CrudePrev 0.1995424 0.29981442
## COPD_CrudePrev 0.3030841 0.11966747
## CHD_CrudePrev 0.3162948 -0.04589079
## DIABETES_CrudePrev 0.2960469 0.12676442
## KIDNEY_CrudePrev 0.3074523 0.07334282
## BPMED_CrudePrev 0.2581304 -0.21748557
## CSMOKING_CrudePrev 0.2108770 0.35609321
## BPHIGH_CrudePrev 0.3168123 0.02448109
## OBESITY_CrudePrev 0.2365472 0.30528754
## HIGHCHOL_CrudePrev 0.2699629 -0.21002118
## male_over_65_pct 0.1161608 -0.42473566
## female_over_65_pct 0.1387121 -0.41499353
fh_acs_established$pc_1 <- as.numeric(pc_established$x[, 1])
fh_acs_established$pc_2 <- as.numeric(pc_established$x[, 2])
placenames_cols <- c("stateabbr","placename" , "county_code", "state_code", "geoid","fips_place_tract","lat","long")
fh_acs_scoring <- fh_acs_established %>% cbind(fh_acs[non_na, placenames_cols])
fh_acs_scoring <- fh_acs_scoring %>% mutate(avg_over_65_pct = (male_over_65_pct+female_over_65_pct)/2)
fh_acs_scoring <- fh_acs_scoring %>% mutate(risk_score_pc_1=rescale(pc_1, to=c(0,100)))
fh_acs_scoring <- fh_acs_scoring %>% mutate(risk_score_pc_2=rescale(-pc_2, to=c(0,100))) # flip sign of pc2
fh_acs_scoring <- fh_acs_scoring %>% mutate(risk_score_both_pc=rescale(risk_score_pc_1*.61 + risk_score_pc_2*.24, to=c(0,100))) # rescale according the the contribution of the PC
fh_acs_scoring <- fh_acs_scoring %>% mutate(risk_score_all=rowSums(.[established_cols])) %>% mutate(risk_score_all=rescale(risk_score_all, to=c(0,100)))
## load in the errors
comm_score_error <- read_rds('./scores/fh_acs_covid_comm_score_error.rds') # see error_simulation.R
fh_acs_scoring <- fh_acs_scoring %>% mutate(score_error=comm_score_error$score_error)
fh_acs_scoring %>% arrange(score_error) %>% dplyr::select(c(score_error, placename))
# show the top 5 tracts for the each axis
fh_acs_scoring_top <- fh_acs_scoring %>% top_n(10, risk_score_pc_1 ) %>%
rbind(fh_acs_scoring %>% top_n(10, risk_score_pc_2 )) %>% unite(place, placename, stateabbr, sep=",")
p <- ggplot(fh_acs_scoring, aes(risk_score_pc_1, risk_score_pc_2))
p <- p + geom_point(alpha=0.5, color='gray')
p <- p + geom_point(data=fh_acs_scoring_top, aes(risk_score_pc_1, risk_score_pc_2), color='red')
p <- p + geom_text_repel(data=fh_acs_scoring_top, aes(risk_score_pc_1, risk_score_pc_2, label=place), color='red',size=3)
p <- p + theme_fivethirtyeight() + theme(axis.title = element_text(), legend.position = 'none') + labs(x='Component 1 (61%)', y='Component 2 (24%)')
p
## now get the variation per city for the risk score
summarized_risk_score_by_city <- fh_acs_scoring %>% group_by(placename) %>% summarise(stateabbr=first(stateabbr), median=median(risk_score_both_pc), low=min(risk_score_both_pc), hi=max(risk_score_both_pc), pct_25=quantile(risk_score_both_pc, probs=.25), pct_75=quantile(risk_score_both_pc, probs = .75),sd=sd(risk_score_both_pc)) %>% mutate(IQR = pct_75 -pct_25)
summarized_risk_score_by_city_top_iqr <- summarized_risk_score_by_city %>% top_n(10, IQR)
summarized_risk_score_by_city_top_iqr <- summarized_risk_score_by_city_top_iqr %>% rbind(summarized_risk_score_by_city %>% filter(median >= 40, IQR > 10),summarized_risk_score_by_city %>% filter(median >= 50)) %>% unite(place, placename, stateabbr, sep=",")
p <- ggplot(summarized_risk_score_by_city, aes(median, IQR))
p <- p+ geom_point(alpha=.5)
p <- p + geom_text_repel(data=summarized_risk_score_by_city_top_iqr, aes(median, IQR, label=place), color='red', size=3)
p <- p + geom_point(data=summarized_risk_score_by_city_top_iqr, aes(median, IQR), color='red')
p <- p + theme_fivethirtyeight() + theme(axis.title = element_text(), legend.position = 'none') + labs(x = '% Median C19 Risk Score in a City', y = 'Difference in IQR of C19 Risk Score in a City')
p
summarized_risk_score_by_city_top_iqr
ses_vars_to_bind <- setdiff(ses_vars, names(fh_acs_scoring))
fh_all <- cbind(fh_acs_scoring, fh_acs[non_na, ses_vars_to_bind])
top_per_state <- fh_acs_scoring %>% group_by(stateabbr) %>% top_n(1,risk_score_both_pc) %>% ungroup() %>% filter(risk_score_both_pc >= 75)
# top census tracts per state based on age, but filtered for those that have a high risk score
oldest_tracts <- fh_acs_scoring %>% group_by(stateabbr) %>% top_n(1,avg_over_65_pct) %>% ungroup() %>% filter(avg_over_65_pct >= .50)
top_both <- fh_acs_scoring %>% filter(risk_score_both_pc >= 75 & avg_over_65_pct >= .50)
p <- ggplot(fh_acs_scoring, aes(I(100*avg_over_65_pct), risk_score_both_pc))
p <- p + geom_point(alpha=0.5, color='gray')
p <- p + geom_point(data=top_per_state, aes(I(100*avg_over_65_pct), risk_score_both_pc))
p <- p + geom_text_repel(data=top_per_state, aes(I(100*avg_over_65_pct), risk_score_both_pc, label=paste(placename, stateabbr)), size=3)
p <- p + geom_point(data=oldest_tracts, aes(I(100*avg_over_65_pct), risk_score_both_pc), color='red')
p <- p + geom_text_repel(data=oldest_tracts, aes(I(100*avg_over_65_pct), risk_score_both_pc, label=paste(placename, stateabbr)),color='red', size=3)
p <- p + theme_fivethirtyeight() + theme(axis.title = element_text(), legend.position = 'none') + labs(x = '% Age over 65', y = '')
p
top_score <- fh_all %>% group_by(stateabbr) %>% top_n(2, risk_score_both_pc) %>% filter(risk_score_both_pc >= 75)
p_income <- ggplot(fh_all, aes(median_income, risk_score_both_pc))
p_income <- p_income + geom_point(alpha=0.5, color='gray')
p_income <- p_income + geom_point(data=top_score, aes(median_income, risk_score_both_pc))
p_income <- p_income + geom_text_repel(data=top_score, aes(median_income, risk_score_both_pc, label=paste(placename, stateabbr)),color='red', size=3)
p_income <- p_income + theme_fivethirtyeight() + theme(axis.title = element_text(), legend.position = 'none') + labs(x = 'Median Income per Tract', y = '')
p_income
## Warning: Removed 110 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_text_repel).
p_poverty <- ggplot(fh_all, aes(I(100*at_below_poverty_pct), risk_score_both_pc))
p_poverty <- p_poverty + geom_point(alpha=0.5, color='gray')
p_poverty <- p_poverty + geom_point(data=top_score, aes(I(100*at_below_poverty_pct), risk_score_both_pc))
p_poverty <- p_poverty + geom_text_repel(data=top_score, aes(I(100*at_below_poverty_pct), risk_score_both_pc, label=paste(placename, stateabbr)),color='red', size=3)
high_poverty <- fh_all %>% group_by(stateabbr) %>% top_n(1, at_below_poverty_pct) %>% filter(at_below_poverty_pct >= .60, risk_score_both_pc > 50)
p_poverty <- p_poverty + geom_point(data=high_poverty, aes(I(100*at_below_poverty_pct), risk_score_both_pc))
p_poverty <- p_poverty + geom_text_repel(data=high_poverty, aes(I(100*at_below_poverty_pct), risk_score_both_pc, label=paste(placename, stateabbr)),color='red', size=3)
p_poverty <- p_poverty + theme_fivethirtyeight() + theme(axis.title = element_text(), legend.position = 'none') + labs(x = '% Below Poverty per Tract', y = '')
p_poverty
## Warning: Removed 44 rows containing missing values (geom_point).
set.seed(123)
fh_all_train_ind <- sample(nrow(fh_all), size=floor(nrow(fh_all)/2))
train <- fh_all[fh_all_train_ind,]
test <- fh_all[setdiff(1:nrow(fh_all), fh_all_train_ind), ]
mod_linear <- lm(risk_score_both_pc ~ scale(median_income) + scale(median_home_value) + scale(at_below_poverty_pct) + scale(unemployment_pct) + scale(non_employment_pct) + scale(less_than_high_school_pct) + scale(no_health_insurance_pct) + scale(more_than_one_occupant_per_room_pct) + scale(african_american_pct) + scale(hispanic_pct) + scale(asian_pct) + scale(other_race_pct), data=train)
test_prediction_lm <- tibble(predicted=predict(mod_linear, test), actual=test$risk_score_both_pc)
predicted_cases_index <- complete.cases(test_prediction_lm)
test_prediction_lm <- test_prediction_lm[predicted_cases_index, ]
summary(lm(predicted ~ actual, test_prediction_lm))
##
## Call:
## lm(formula = predicted ~ actual, data = test_prediction_lm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.092 -2.605 -0.394 2.044 41.771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.591745 0.156285 99.77 <2e-16 ***
## actual 0.542774 0.004482 121.09 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.228 on 13061 degrees of freedom
## Multiple R-squared: 0.5289, Adjusted R-squared: 0.5289
## F-statistic: 1.466e+04 on 1 and 13061 DF, p-value: < 2.2e-16
if(RUN_RF) {
mod_rf <- randomForest(risk_score_both_pc ~ median_income + unemployment_pct + median_home_value + at_below_poverty_pct + unemployment_pct + non_employment_pct + less_than_high_school_pct + no_health_insurance_pct + more_than_one_occupant_per_room_pct + african_american_pct + hispanic_pct + asian_pct + other_race_pct, data=train, na.action=na.roughfix, ntree=1000, importance=T)
importance(mod_rf)
test_prediction_rf <- tibble(predicted=predict(mod_rf, test), actual=test$risk_score_both_pc)
predicted_cases_index <- complete.cases(test_prediction_rf)
test_prediction_rf <- test_prediction_rf[predicted_cases_index, ]
summary(lm(predicted ~ actual, test_prediction_rf))
p <- ggplot(test_prediction_rf, aes(actual, predicted))
p <- p + geom_point(alpha=.1)
p
pred <- predict(mod_rf, fh_all)
fh_all <- fh_all %>% mutate(risk_score_both_pc_residual = risk_score_both_pc-pred)
if(WRITE_OUTPUT) {
fh_all_ses_residual <- fh_all %>% dplyr::select(fips_place_tract, geoid, placename, stateabbr, risk_score_both_pc, risk_score_both_pc_residual)
write_rds(fh_all_ses_residual, path = "fh_acs_ses_residual_covid_comm_score.rds")
write_csv(fh_all_ses_residual, path="fh_acs_ses_residual_covid_comm_score.csv")
}
}
fh_acs_scoring <- fh_acs_scoring %>% cbind(fh_acs[non_na, c('total_males', 'total_females')])
fh_acs_scoring <- fh_acs_scoring %>% mutate(risk_score = risk_score_both_pc)
fh_acs_scoring <- fh_acs_scoring %>% group_split(placename) %>% map(function(.x) { .x %>% mutate(rank_in_city=rank(-risk_score)) }) %>% bind_rows()
fh_acs_scoring <- fh_acs_scoring %>% group_split(stateabbr) %>% map(function(.x) { .x %>% mutate(rank_in_state=rank(-risk_score)) }) %>% bind_rows()
if(WRITE_OUTPUT) {
write_rds(fh_acs_scoring, path = "fh_acs_covid_comm_score.rds")
write_csv(fh_acs_scoring, path="fh_acs_covid_comm_score.csv")
}
fh_acs_scoring_city <- fh_acs_scoring %>% dplyr::select(-c(pc_1, pc_2, risk_score_pc_1, risk_score_pc_2, risk_score, risk_score_both_pc, risk_score_all, lat,long, geoid, fips_place_tract)) %>% unite(place_state, placename, stateabbr, sep="|")
fh_acs_scoring_city <- fh_acs_scoring_city %>% mutate(total_population = total_males + total_females)
cols_to_avg <- c(established_disease, established_risk)
cols_se <- c(established_disease_se, established_risk_se)
fh_acs_scoring_city <- fh_acs_scoring_city %>% cbind(fh_acs[non_na, c(established_disease_se, established_risk_se)])
cols_to_total <- c("male_over_65_pct","female_over_65_pct","avg_over_65_pct")
cols_total <- c("male_over_65_count","female_over_65_count","avg_over_65_count")
for(i in 1:length(cols_to_total)) {
fh_acs_scoring_city <- fh_acs_scoring_city %>% mutate(!!cols_total[i] := .data[[cols_to_total[i]]]*total_population)
}
fh_acs_scoring_by_city <- fh_acs_scoring_city %>% group_by(place_state) %>% summarise_at(c(cols_total, 'total_population'), sum)
cols_to_prev <- c("male_over_65_pct","female_over_65_pct","avg_over_65_pct")
for(i in 1:length(cols_total)) {
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% mutate(!!cols_to_prev[i] := .data[[cols_total[i]]]/total_population)
}
## now take weighted prevalence average
weighted_prev_avg <- function(d, prefix_col) {
# d is grouped by frame for a city or state
prev_col <- sprintf('%s_CrudePrev', prefix_col)
se_col <- sprintf('%s_SE', prefix_col)
ind <- complete.cases(d[, c(prev_col, se_col)])
d <- d[ind,]
weighted.mean(d[[prev_col]], 1/(d[[se_col]]+0.0005)) # note the fudge factor
}
weighted_prev_avgs <- function(d, prefix_cols) {
avgs <- prefix_cols %>% map_dbl(~weighted_prev_avg(d, .x))
place<- d[['place_state']][1]
tibble(disease=prefix_cols, prevalence=avgs, place_state=place)
}
prefixes <- c("CANCER", "ARTHRITIS", "STROKE","CASTHMA", "COPD", "CHD", "DIABETES","KIDNEY", "BPMED", "CSMOKING", "BPHIGH","OBESITY","HIGHCHOL" )
fh_acs_scoring_prevs_by_city <- fh_acs_scoring_city %>% group_by(place_state) %>% group_split(keep=TRUE) %>% map_df(~weighted_prev_avgs(.x, prefixes))
fh_acs_scoring_prevs_by_city$disease <- sprintf('%s_CrudePrev', fh_acs_scoring_prevs_by_city$disease)
fh_acs_scoring_prevs_by_city <- fh_acs_scoring_prevs_by_city %>% spread(disease, prevalence)
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% left_join(fh_acs_scoring_prevs_by_city, by='place_state')
pc_by_city <- prcomp(fh_acs_scoring_by_city[,established_cols],center=T, scale.=T)
summary(pc_by_city)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 3.0342 1.7703 0.97630 0.71648 0.53455 0.5020
## Proportion of Variance 0.6138 0.2089 0.06354 0.03422 0.01905 0.0168
## Cumulative Proportion 0.6138 0.8227 0.88624 0.92046 0.93951 0.9563
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.42483 0.37164 0.3464 0.25315 0.22144 0.19245
## Proportion of Variance 0.01203 0.00921 0.0080 0.00427 0.00327 0.00247
## Cumulative Proportion 0.96834 0.97755 0.9856 0.98982 0.99309 0.99556
## PC13 PC14 PC15
## Standard deviation 0.17249 0.14290 0.12805
## Proportion of Variance 0.00198 0.00136 0.00109
## Cumulative Proportion 0.99755 0.99891 1.00000
pc_by_city$rotation
## PC1 PC2 PC3 PC4
## CANCER_CrudePrev -0.1513625 -0.46234141 0.22821155 -0.11472234
## ARTHRITIS_CrudePrev -0.2940666 -0.10042677 0.32428060 -0.15763331
## STROKE_CrudePrev -0.3107515 0.09672029 -0.09092279 0.25688009
## CASTHMA_CrudePrev -0.1934841 0.21545877 0.63368651 0.23624056
## COPD_CrudePrev -0.3097551 0.05194417 0.15298565 0.07532731
## CHD_CrudePrev -0.3175390 -0.03559995 -0.07979953 0.14232241
## DIABETES_CrudePrev -0.2680392 0.19977554 -0.41492573 0.15166847
## KIDNEY_CrudePrev -0.2902240 0.10601242 -0.23259744 0.45053133
## BPMED_CrudePrev -0.2651378 -0.14699842 -0.08255760 -0.53277744
## CSMOKING_CrudePrev -0.2626526 0.24489948 0.24498121 -0.12100572
## BPHIGH_CrudePrev -0.3058207 0.07837667 -0.12820089 -0.16401360
## OBESITY_CrudePrev -0.2378426 0.31547582 0.02046824 -0.30149445
## HIGHCHOL_CrudePrev -0.2606948 -0.15744218 -0.30352090 -0.27935332
## male_over_65_pct -0.1434706 -0.48444226 -0.01096777 0.21290136
## female_over_65_pct -0.1607230 -0.46748113 0.03110201 0.22050634
## PC5 PC6 PC7 PC8
## CANCER_CrudePrev 0.08392488 -0.07347842 0.25553415 -0.46669281
## ARTHRITIS_CrudePrev 0.10658331 -0.10064919 0.22599308 -0.12733324
## STROKE_CrudePrev -0.11393147 0.10683646 -0.13890118 -0.14884583
## CASTHMA_CrudePrev -0.34373332 -0.47370433 -0.04433591 0.13814565
## COPD_CrudePrev 0.33440598 0.16857389 -0.26693587 -0.07595834
## CHD_CrudePrev 0.27360824 0.12780107 0.02874123 -0.28674742
## DIABETES_CrudePrev -0.18302466 -0.09581671 0.09538506 -0.02628227
## KIDNEY_CrudePrev -0.08977368 -0.09208268 0.08173772 -0.22918005
## BPMED_CrudePrev -0.55368038 0.12104692 -0.36420795 -0.23807871
## CSMOKING_CrudePrev 0.31811780 0.34094732 -0.35361984 0.23671445
## BPHIGH_CrudePrev -0.20110174 -0.01062993 0.01577419 0.32652656
## OBESITY_CrudePrev -0.01310907 0.24021625 0.70054193 0.16737612
## HIGHCHOL_CrudePrev 0.38921737 -0.65747628 -0.10373285 0.24712102
## male_over_65_pct -0.06690911 0.19134865 0.12225284 0.38076066
## female_over_65_pct -0.13370052 0.16925387 -0.01007635 0.36074883
## PC9 PC10 PC11 PC12
## CANCER_CrudePrev 0.072870493 -0.33914832 0.27547172 -0.23406289
## ARTHRITIS_CrudePrev 0.241497931 0.67528027 0.12988256 0.28288316
## STROKE_CrudePrev 0.212762616 -0.23073121 0.25143581 0.17967364
## CASTHMA_CrudePrev -0.145995946 -0.10206475 -0.12926353 -0.06412544
## COPD_CrudePrev 0.219679729 -0.10294665 -0.65236875 -0.05495016
## CHD_CrudePrev -0.099853188 0.05922997 -0.18073711 -0.13243167
## DIABETES_CrudePrev 0.005254134 0.43398080 -0.01451853 -0.22891985
## KIDNEY_CrudePrev -0.265574072 -0.13103901 0.16133076 0.17389213
## BPMED_CrudePrev -0.261258660 0.02080074 -0.15209884 0.09552718
## CSMOKING_CrudePrev -0.276916937 0.02510667 0.50793699 -0.02121700
## BPHIGH_CrudePrev 0.681418713 -0.23231460 0.14031844 -0.10110595
## OBESITY_CrudePrev -0.259482972 -0.20680412 -0.16221918 -0.04383570
## HIGHCHOL_CrudePrev -0.191505842 -0.11908380 0.00315688 0.03989486
## male_over_65_pct -0.087832405 -0.06557378 -0.12377680 0.59321163
## female_over_65_pct -0.141449238 0.17152905 0.01893415 -0.59193355
## PC13 PC14 PC15
## CANCER_CrudePrev 0.301953414 0.08927087 0.22890783
## ARTHRITIS_CrudePrev -0.237040114 -0.12640264 0.03568674
## STROKE_CrudePrev -0.486543461 0.56781851 0.02883620
## CASTHMA_CrudePrev 0.127599997 0.10395957 -0.12462931
## COPD_CrudePrev -0.003373466 -0.07894889 0.40162146
## CHD_CrudePrev 0.122546485 0.04612681 -0.78498727
## DIABETES_CrudePrev 0.470687683 0.32485392 0.26935319
## KIDNEY_CrudePrev -0.092139236 -0.62623112 0.16457916
## BPMED_CrudePrev -0.034577843 -0.06229959 -0.02928294
## CSMOKING_CrudePrev 0.236746526 -0.02143721 0.07471927
## BPHIGH_CrudePrev 0.157695741 -0.31052596 -0.20015988
## OBESITY_CrudePrev -0.165828195 0.07669304 0.06497547
## HIGHCHOL_CrudePrev -0.145504597 0.08690142 0.01766287
## male_over_65_pct 0.318050339 0.13286900 -0.01495440
## female_over_65_pct -0.350178782 -0.07011795 0.03949111
fh_acs_scoring_by_city$pc_1 <- as.numeric(pc_by_city$x[, 1])
fh_acs_scoring_by_city$pc_2 <- as.numeric(pc_by_city$x[, 2])
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% mutate(risk_score_pc_1=as.numeric(rescale(-pc_1, to=c(0,100)))) # flip the sign
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% mutate(risk_score_pc_2=as.numeric(rescale(-pc_2, to=c(0,100))))
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% mutate(risk_score_both_pc=rescale(risk_score_pc_1*.61 + risk_score_pc_2*.24, to=c(0,100))) # rescale according the the contribution of the PC
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% mutate(risk_score_all=rowSums(.[established_cols])) %>% mutate(risk_score_all=as.numeric(rescale(risk_score_all, to=c(0,100))))
plot(fh_acs_scoring_by_city$risk_score_pc_1, fh_acs_scoring_by_city$risk_score_all)
plot(fh_acs_scoring_by_city$risk_score_pc_2, fh_acs_scoring_by_city$risk_score_all)
# add stateabbr
plot(fh_acs_scoring_by_city$risk_score_both_pc, fh_acs_scoring_by_city$avg_over_65_pct)
plot(fh_acs_scoring_by_city$risk_score_both_pc, fh_acs_scoring_by_city$DIABETES_CrudePrev)
fh_acs_scoring_by_city <- fh_acs_scoring_by_city %>% separate(place_state, c("placename", "stateabbr"), sep="\\|")
top_per_state <- fh_acs_scoring_by_city %>% group_by(stateabbr) %>% top_n(5,risk_score_pc_1) %>% ungroup()
DT::datatable(top_per_state %>% dplyr::select(stateabbr, placename, risk_score_pc_1, risk_score_both_pc, risk_score_all))
fh_acs_scoring_by_city_distribute <- fh_acs_scoring_by_city %>% dplyr::select(c(established_cols, "placename", "stateabbr", "risk_score_both_pc", "risk_score_pc_1", "risk_score_pc_2", "risk_score_all", "total_population"))
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(established_cols)` instead of `established_cols` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
fh_acs_scoring_by_city_distribute <- fh_acs_scoring_by_city_distribute %>% mutate(risk_score = risk_score_both_pc)
fh_acs_scoring_by_city_distribute <- fh_acs_scoring_by_city_distribute %>% group_split(stateabbr) %>% map(function(.x) { .x %>% mutate(rank_in_state=rank(-risk_score)) }) %>% bind_rows()
if(WRITE_OUTPUT) {
write_rds(fh_acs_scoring_by_city, path = "fh_acs_covid_city_comm_score.rds")
write_csv(fh_acs_scoring_by_city_distribute, path="fh_acs_covid_city_comm_score.csv")
}
fh_acs_scoring_county <- fh_acs_scoring %>% dplyr::select(-c(pc_1, pc_2, risk_score_pc_1, risk_score_pc_2, risk_score_all, risk_score_both_pc, lat,long, geoid, fips_place_tract)) %>% mutate(total_population = total_males + total_females)
location_info <- fh_acs_scoring %>% dplyr::select(county_code, stateabbr) %>% unique() # go up a hierarchy
fh_acs_scoring_county <- fh_acs_scoring_county %>% cbind(fh_acs[non_na, c(established_disease_se, established_risk_se)])
county_counts <- fh_acs_scoring_county %>% group_by(county_code) %>% summarize(n=n())
fh_acs_scoring_county <- fh_acs_scoring_county %>% left_join(county_counts) %>% filter(n > 1)
## Joining, by = "county_code"
for(i in 1:length(cols_to_total)) {
fh_acs_scoring_county <- fh_acs_scoring_county %>% mutate(!!cols_total[i] := .data[[cols_to_total[i]]]*total_population)
}
fh_acs_scoring_by_county <- fh_acs_scoring_county %>% group_by(county_code) %>% summarise_at(c(cols_total, 'total_population'), sum)
for(i in 1:length(cols_total)) {
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% mutate(!!cols_to_prev[i] := .data[[cols_total[i]]]/total_population)
}
weighted_county_code_prev_avgs <- function(d, prefix_cols) {
avgs <- prefix_cols %>% map_dbl(~weighted_prev_avg(d, .x))
place<- d[['county_code']][1]
tibble(disease=prefix_cols, prevalence=avgs, county_code=place)
}
fh_acs_scoring_prevs_by_county <- fh_acs_scoring_county %>% group_by(county_code) %>% group_split(keep=TRUE) %>% map_df(~weighted_county_code_prev_avgs(.x, prefixes))
fh_acs_scoring_prevs_by_county$disease <- sprintf('%s_CrudePrev', fh_acs_scoring_prevs_by_county$disease)
fh_acs_scoring_prevs_by_county <- fh_acs_scoring_prevs_by_county %>% spread(disease, prevalence)
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% left_join(fh_acs_scoring_prevs_by_county, by='county_code')
heatmap.2(cor(fh_acs_scoring_by_county[,established_cols]))
pc_by_county <- prcomp(fh_acs_scoring_by_county[,established_cols],center=T, scale.=T)
summary(pc_by_county)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 3.0683 1.6631 0.93637 0.74657 0.56938 0.55327
## Proportion of Variance 0.6276 0.1844 0.05845 0.03716 0.02161 0.02041
## Cumulative Proportion 0.6276 0.8120 0.87048 0.90763 0.92925 0.94965
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.47853 0.40564 0.33632 0.27247 0.2479 0.21505
## Proportion of Variance 0.01527 0.01097 0.00754 0.00495 0.0041 0.00308
## Cumulative Proportion 0.96492 0.97589 0.98343 0.98838 0.9925 0.99556
## PC13 PC14 PC15
## Standard deviation 0.17598 0.14248 0.12373
## Proportion of Variance 0.00206 0.00135 0.00102
## Cumulative Proportion 0.99763 0.99898 1.00000
pc_by_county$rotation
## PC1 PC2 PC3 PC4
## CANCER_CrudePrev -0.1666402 -0.46710898 -0.12727428 0.23758525
## ARTHRITIS_CrudePrev -0.2902344 -0.10349410 -0.27079042 0.30362465
## STROKE_CrudePrev -0.3065678 0.10300399 0.03877146 -0.30512916
## CASTHMA_CrudePrev -0.1906044 0.19933650 -0.71060290 -0.12545798
## COPD_CrudePrev -0.3050213 0.05025504 -0.16770384 -0.02522198
## CHD_CrudePrev -0.3123990 -0.03988041 0.06446970 -0.13628887
## DIABETES_CrudePrev -0.2784138 0.19377115 0.32685305 -0.25976200
## KIDNEY_CrudePrev -0.2920983 0.09514468 0.14000965 -0.46110621
## BPMED_CrudePrev -0.2712243 -0.08809253 0.19144072 0.34971923
## CSMOKING_CrudePrev -0.2553653 0.24179824 -0.25999076 0.15589505
## BPHIGH_CrudePrev -0.2972654 0.10760894 0.17220335 0.08043558
## OBESITY_CrudePrev -0.2349413 0.30978832 0.09604647 0.32432325
## HIGHCHOL_CrudePrev -0.2688750 -0.13348753 0.30635232 0.30006038
## male_over_65_pct -0.1427766 -0.50051949 -0.03218733 -0.21389231
## female_over_65_pct -0.1708718 -0.47741351 -0.08535878 -0.22173714
## PC5 PC6 PC7 PC8
## CANCER_CrudePrev -0.217197418 0.26148317 -0.10081538 0.42762361
## ARTHRITIS_CrudePrev -0.113200438 0.09646505 0.16711323 0.06266107
## STROKE_CrudePrev -0.009377157 -0.01500191 -0.15709016 0.09305968
## CASTHMA_CrudePrev -0.434083544 -0.27028419 0.13168005 -0.07095784
## COPD_CrudePrev 0.209145940 0.28647467 -0.13146871 -0.22161814
## CHD_CrudePrev 0.051017308 0.34623163 -0.11330264 0.20334670
## DIABETES_CrudePrev -0.137591318 -0.08406118 0.06437783 0.03313377
## KIDNEY_CrudePrev -0.172878229 0.09849564 -0.04250791 0.21406198
## BPMED_CrudePrev -0.097418752 -0.56310734 -0.58525826 0.09706909
## CSMOKING_CrudePrev 0.527681173 0.16012193 -0.30666530 -0.21809934
## BPHIGH_CrudePrev -0.027674320 -0.27840922 0.20824528 -0.35326231
## OBESITY_CrudePrev 0.309981549 -0.13935792 0.52767195 0.49132355
## HIGHCHOL_CrudePrev -0.344718169 0.30091911 0.21312741 -0.46981273
## male_over_65_pct 0.324901371 -0.23090688 0.26346865 -0.12385887
## female_over_65_pct 0.217135134 -0.21633114 0.10298491 -0.05370117
## PC9 PC10 PC11 PC12
## CANCER_CrudePrev -0.160096403 0.42065782 -0.090210874 0.065688156
## ARTHRITIS_CrudePrev -0.262912439 -0.61250516 -0.385182227 -0.050418232
## STROKE_CrudePrev -0.176366306 0.26316364 -0.147829009 -0.207427780
## CASTHMA_CrudePrev 0.212887732 0.09933891 0.146680817 -0.020580787
## COPD_CrudePrev -0.283817815 -0.10392786 0.681535675 0.049684335
## CHD_CrudePrev -0.001090552 -0.16300058 0.143841559 0.012083721
## DIABETES_CrudePrev 0.029372901 -0.35102120 -0.125031911 0.248568010
## KIDNEY_CrudePrev 0.185609722 0.08170092 -0.116066313 -0.140859362
## BPMED_CrudePrev 0.127861127 -0.12190631 0.179796674 -0.096108903
## CSMOKING_CrudePrev 0.290501158 0.16103908 -0.430378527 0.021049794
## BPHIGH_CrudePrev -0.580849785 0.35365074 -0.149304675 0.110814705
## OBESITY_CrudePrev 0.176941340 0.12613164 0.206371197 0.006478071
## HIGHCHOL_CrudePrev 0.446212140 0.11483896 0.053395650 -0.074216004
## male_over_65_pct 0.061794076 -0.07979618 0.023375429 -0.607072615
## female_over_65_pct 0.201480946 0.01310754 -0.003369048 0.685130788
## PC13 PC14 PC15
## CANCER_CrudePrev -0.29982048 0.20020110 0.166767159
## ARTHRITIS_CrudePrev 0.29137577 -0.01775561 0.071809041
## STROKE_CrudePrev 0.45430470 0.45389030 -0.435315813
## CASTHMA_CrudePrev -0.17126174 -0.02650641 -0.110546878
## COPD_CrudePrev 0.06989649 0.19403670 0.287434518
## CHD_CrudePrev -0.24054627 -0.52407160 -0.567008450
## DIABETES_CrudePrev -0.52494052 0.44582495 0.059742263
## KIDNEY_CrudePrev 0.22905717 -0.34963881 0.580545810
## BPMED_CrudePrev 0.04540168 -0.07655242 0.022340101
## CSMOKING_CrudePrev -0.18419399 0.01636916 0.092399450
## BPHIGH_CrudePrev -0.12971666 -0.32045102 0.030812750
## OBESITY_CrudePrev 0.08756811 0.03935685 -0.003459127
## HIGHCHOL_CrudePrev 0.12946752 0.07607271 -0.085251772
## male_over_65_pct -0.23730476 0.05099024 0.022145883
## female_over_65_pct 0.25524112 -0.02870006 -0.053249529
fh_acs_scoring_by_county$pc_1 <- as.numeric(pc_by_county$x[, 1])
fh_acs_scoring_by_county$pc_2 <- as.numeric(pc_by_county$x[, 2])
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% mutate(risk_score_pc_1=as.numeric(rescale(-pc_1,to=c(0,100))))
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% mutate(risk_score_pc_2=as.numeric(rescale(-pc_2,to=c(0,100))))
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% mutate(risk_score_both_pc=rescale(risk_score_pc_1*.61 + risk_score_pc_2*.24, to=c(0,100))) # rescale according the the contribution of the PC
fh_acs_scoring_by_county <- fh_acs_scoring_by_county %>% mutate(risk_score_all=rowSums(.[established_cols])) %>% mutate(risk_score_all=as.numeric(rescale(risk_score_all, to=c(0,100))))
plot(fh_acs_scoring_by_county$risk_score_pc_1, fh_acs_scoring_by_county$risk_score_all)
plot(fh_acs_scoring_by_county$risk_score_pc_2, fh_acs_scoring_by_county$risk_score_all)
fh_acs_scoring_by_county_distribute <- fh_acs_scoring_by_county %>% dplyr::select(c(established_cols, "county_code", "risk_score_both_pc", "risk_score_pc_1", "risk_score_pc_2", "risk_score_all", "total_population"))
fh_acs_scoring_by_county_distribute <- fh_acs_scoring_by_county_distribute %>% mutate(risk_score = risk_score_both_pc)
fh_acs_scoring_by_county_distribute <- fh_acs_scoring_by_county_distribute %>% left_join(location_info)
## Joining, by = "county_code"
fh_acs_scoring_by_county_distribute <- fh_acs_scoring_by_county_distribute %>% group_split(stateabbr) %>% map(function(.x) { .x %>% mutate(rank_in_state=rank(-risk_score)) }) %>% bind_rows()
DT::datatable(fh_acs_scoring_by_county_distribute %>% dplyr::select(county_code, stateabbr, risk_score, risk_score_pc_1, risk_score_all))
if(WRITE_OUTPUT) {
write_rds(fh_acs_scoring_by_county, path = "fh_acs_covid_county_comm_score.rds")
write_csv(fh_acs_scoring_by_county_distribute, path="fh_acs_covid_county_comm_score.csv")
}
fh_acs_scoring_state <- fh_acs_scoring %>% dplyr::select(-c(pc_1, pc_2, risk_score_pc_1, risk_score_pc_2, risk_score_all, risk_score_both_pc, lat,long, geoid, fips_place_tract)) %>% mutate(total_population = total_males + total_females)
fh_acs_scoring_state <- fh_acs_scoring_state %>% cbind(fh_acs[non_na, c(established_disease_se, established_risk_se)])
cols_to_total <- c("male_over_65_pct","female_over_65_pct","avg_over_65_pct")
cols_total <- c("male_over_65_count","female_over_65_count","avg_over_65_count")
for(i in 1:length(cols_to_total)) {
fh_acs_scoring_state <- fh_acs_scoring_state %>% mutate(!!cols_total[i] := .data[[cols_to_total[i]]]*total_population)
}
fh_acs_scoring_by_state <- fh_acs_scoring_state %>% group_by(stateabbr) %>% summarise_at(c(cols_total, 'total_population'), sum)
cols_to_prev <- c("male_over_65_pct","female_over_65_pct","avg_over_65_pct")
for(i in 1:length(cols_total)) {
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% mutate(!!cols_to_prev[i] := .data[[cols_total[i]]]/total_population)
}
weighted_state_prev_avgs <- function(d, prefix_cols) {
avgs <- prefix_cols %>% map_dbl(~weighted_prev_avg(d, .x))
place<- d[['stateabbr']][1]
tibble(disease=prefix_cols, prevalence=avgs, stateabbr=place)
}
fh_acs_scoring_prevs_by_state <- fh_acs_scoring_state %>% group_by(stateabbr) %>% group_split(keep=TRUE) %>% map_df(~weighted_state_prev_avgs(.x, prefixes))
fh_acs_scoring_prevs_by_state$disease <- sprintf('%s_CrudePrev', fh_acs_scoring_prevs_by_state$disease)
fh_acs_scoring_prevs_by_state <- fh_acs_scoring_prevs_by_state %>% spread(disease, prevalence)
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% left_join(fh_acs_scoring_prevs_by_state, by='stateabbr')
heatmap.2(cor(fh_acs_scoring_by_state[,established_cols]))
pc_by_state <- prcomp(fh_acs_scoring_by_state[,established_cols],center=T, scale.=T)
summary(pc_by_state)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 3.0624 1.6261 0.93665 0.84338 0.60304 0.55470
## Proportion of Variance 0.6252 0.1763 0.05849 0.04742 0.02424 0.02051
## Cumulative Proportion 0.6252 0.8015 0.85998 0.90740 0.93164 0.95216
## PC7 PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.42119 0.40141 0.35899 0.26226 0.24022 0.20958
## Proportion of Variance 0.01183 0.01074 0.00859 0.00459 0.00385 0.00293
## Cumulative Proportion 0.96398 0.97473 0.98332 0.98790 0.99175 0.99468
## PC13 PC14 PC15
## Standard deviation 0.18279 0.15936 0.1450
## Proportion of Variance 0.00223 0.00169 0.0014
## Cumulative Proportion 0.99691 0.99860 1.0000
pc_by_state$rotation
## PC1 PC2 PC3 PC4
## CANCER_CrudePrev 0.10849184 -0.48687692 0.31398024 -0.411886788
## ARTHRITIS_CrudePrev 0.28762056 -0.17192838 0.25955907 -0.197225365
## STROKE_CrudePrev 0.29568787 0.16693564 -0.03297050 0.230510634
## CASTHMA_CrudePrev 0.20051809 0.05812432 0.69740669 0.425880543
## COPD_CrudePrev 0.30897788 -0.02911972 0.16850702 -0.046970195
## CHD_CrudePrev 0.30775724 -0.05746680 0.06694467 -0.135046388
## DIABETES_CrudePrev 0.29745592 0.16297635 -0.20301709 0.171436753
## KIDNEY_CrudePrev 0.28920461 0.13182034 -0.09141990 0.335045426
## BPMED_CrudePrev 0.27306413 -0.03869105 -0.27696012 0.006104727
## CSMOKING_CrudePrev 0.28252688 0.14581369 0.07492955 -0.172516093
## BPHIGH_CrudePrev 0.30551845 0.09775451 -0.17117986 0.014660662
## OBESITY_CrudePrev 0.26438155 0.23866718 -0.01158984 -0.344739791
## HIGHCHOL_CrudePrev 0.26508121 -0.07842503 -0.28536429 -0.290631223
## male_over_65_pct 0.09406325 -0.54440342 -0.23571636 0.238328022
## female_over_65_pct 0.13719840 -0.51210848 -0.12401379 0.323614019
## PC5 PC6 PC7 PC8
## CANCER_CrudePrev -0.092800184 0.17138413 -0.036601197 0.33053106
## ARTHRITIS_CrudePrev 0.122650282 -0.12151074 -0.071701223 0.12127297
## STROKE_CrudePrev -0.115817383 0.29426838 -0.119429075 0.13270871
## CASTHMA_CrudePrev 0.120709997 -0.37897765 0.038021789 -0.06411735
## COPD_CrudePrev 0.074447044 0.15528084 0.254328731 -0.16032606
## CHD_CrudePrev -0.002075941 0.39780839 0.198677079 0.09173957
## DIABETES_CrudePrev 0.168051107 0.06790125 0.008225568 0.16780350
## KIDNEY_CrudePrev 0.093091476 0.33141343 -0.013481235 0.26613298
## BPMED_CrudePrev -0.544064878 -0.47452712 0.436704927 0.32519894
## CSMOKING_CrudePrev -0.434077909 0.15799066 0.072228542 -0.70237792
## BPHIGH_CrudePrev 0.129793475 -0.23110414 -0.277026208 -0.09955023
## OBESITY_CrudePrev -0.125653438 -0.21319431 -0.650209620 0.11167196
## HIGHCHOL_CrudePrev 0.618208470 -0.21857354 0.277584039 -0.19754738
## male_over_65_pct -0.041764415 0.09842596 -0.299529602 -0.18383121
## female_over_65_pct -0.070108924 -0.14958462 -0.105783421 -0.16408911
## PC9 PC10 PC11 PC12
## CANCER_CrudePrev -0.33847058 0.27385133 -0.022327365 0.33731906
## ARTHRITIS_CrudePrev 0.22244333 -0.60346181 0.406571091 -0.22591943
## STROKE_CrudePrev -0.16795671 0.08438731 0.529350080 0.07680340
## CASTHMA_CrudePrev -0.19024150 0.07708237 -0.090885615 -0.03016615
## COPD_CrudePrev 0.43576304 0.43458153 -0.231497062 -0.26849595
## CHD_CrudePrev 0.32665747 -0.06317301 -0.062038440 -0.04336513
## DIABETES_CrudePrev 0.16708007 -0.16881185 -0.162410783 0.39273395
## KIDNEY_CrudePrev -0.38074297 -0.11567382 -0.305453027 -0.14729176
## BPMED_CrudePrev -0.05992350 0.04338464 0.006375307 -0.15078108
## CSMOKING_CrudePrev -0.25668548 -0.17977944 -0.008194147 0.14487659
## BPHIGH_CrudePrev 0.14631682 0.48295176 0.390584986 0.18808809
## OBESITY_CrudePrev 0.02950784 -0.03225527 -0.410628122 -0.12963899
## HIGHCHOL_CrudePrev -0.36860676 -0.03898386 -0.036720263 -0.01577031
## male_over_65_pct -0.13471336 0.08823552 0.032653899 -0.51681016
## female_over_65_pct 0.23728386 -0.18776488 -0.211935685 0.46391065
## PC13 PC14 PC15
## CANCER_CrudePrev 0.02199290 0.11447485 0.130167303
## ARTHRITIS_CrudePrev -0.15251017 0.26895818 0.078774664
## STROKE_CrudePrev -0.28876303 -0.52508602 0.134373204
## CASTHMA_CrudePrev 0.22722459 -0.14497817 -0.030962493
## COPD_CrudePrev -0.39636904 0.05755014 0.296967872
## CHD_CrudePrev 0.46525605 -0.27770811 -0.512941666
## DIABETES_CrudePrev 0.39440097 0.09678108 0.592727304
## KIDNEY_CrudePrev -0.23994099 0.42936186 -0.271286432
## BPMED_CrudePrev 0.03432547 -0.01059645 -0.002745499
## CSMOKING_CrudePrev 0.06213576 0.16059531 0.051847316
## BPHIGH_CrudePrev 0.12684531 0.40721317 -0.293562171
## OBESITY_CrudePrev -0.07203648 -0.25448972 -0.019739381
## HIGHCHOL_CrudePrev -0.07019707 -0.25693202 -0.041457898
## male_over_65_pct 0.32997346 -0.02800812 0.209073162
## female_over_65_pct -0.33825768 -0.14276478 -0.216611910
fh_acs_scoring_by_state$pc_1 <- as.numeric(pc_by_state$x[, 1])
fh_acs_scoring_by_state$pc_2 <- as.numeric(pc_by_state$x[, 2])
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% mutate(risk_score_pc_1=as.numeric(rescale(pc_1,to=c(0,100))))
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% mutate(risk_score_pc_2=as.numeric(rescale(-pc_2,to=c(0,100))))
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% mutate(risk_score_both_pc=rescale(risk_score_pc_1*.61 + risk_score_pc_2*.24, to=c(0,100))) # rescale according the the contribution of the PC
fh_acs_scoring_by_state <- fh_acs_scoring_by_state %>% mutate(risk_score_all=rowSums(.[established_cols])) %>% mutate(risk_score_all=as.numeric(rescale(risk_score_all,to=c(0,100))))
fh_acs_scoring_by_state_distribute <- fh_acs_scoring_by_state %>% dplyr::select(c(established_cols, "stateabbr", "risk_score_both_pc", "risk_score_pc_1", "risk_score_pc_2", "risk_score_all", "total_population"))
fh_acs_scoring_by_state_distribute <- fh_acs_scoring_by_state_distribute %>% mutate(rank_in_country=rank(risk_score_both_pc)) %>% mutate(risk_score = risk_score_both_pc)
DT::datatable(fh_acs_scoring_by_state_distribute %>% dplyr::select(stateabbr, risk_score,risk_score_all))
if(WRITE_OUTPUT) {
write_rds(fh_acs_scoring_by_state, path = "fh_acs_covid_state_comm_score.rds")
write_csv(fh_acs_scoring_by_state_distribute, path="fh_acs_covid_state_comm_score.csv")
}